rm(list = ls(all.names = TRUE))
gc()
set.seed(211917)
options(scipen=999)

packages <-c("tidyverse","foreign","stargazer","naniar","ri2")

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(packages, require, character.only = TRUE)
rm(packages, new.packages)

setwd("PUT YOUR WORKING DIRECTORY HERE")

# load data
dat <- read.dta("./data.dta")

#---------------#
# Data cleaning #
#---------------#

dat$q6_years_married[dat$q6_years_married == -9] <- 0
dat$treatment_binary <- ifelse(dat$holy_endorser==1 | dat$secular_endorser==1, 1, 0)
dat$treatment_suicide <- ifelse(dat$treatment == 1,1,0) 

## select all possible covariates and outcome at first
dat <- dat %>% 
  select(q30_combined, treatment_binary, treatment_suicide, holy_endorser, secular_endorser, respondent_muslim, respondent_male, enumerator_muslim, enumerator_male, q1_age, q2_male, q3_kamba, q3_kikuyu, q3_luo, q3_somali, q3_other, q4_religion_christian, q4_religion_muslim, q5_convert, q6_years_married, q6_married, q7_has_job, q8_income_last_month, q8_has_income, q9_chance_become_rich, q10_plans_to_vote, q11_govt_represents_interest, q12_feels_on_loosing_side, q13_a_knows_emigrant, q13_b_knows_emigrant_som_eri, q15_witnesses_interrel_violence, q16_witnesses_viol_musl_govt, q17_friend_family_died, q18_lost_job, q18_arrested, q18_house_of_worship_raided, q18_stopped_speaking_parents, q18_love_problems, q18_friend_left_country, q18_sum, q19_rlts_mother, q19_not_raised_by_mom, q19_mother_dead, q20_rlts_father, q20_not_raised_by_father, q20_father_dead, q21_respect_friends_family, q22_gender, q22_religion, q22_tribal, q22_national, q22_youth, q23_religious_attendance, q23_frequents_house_worship, q24_deontology, q25_payment_acceptable, q26_payment_ethical, q27_radicalization, q33_distance)

## check for missings
vis_miss(dat)

## following variables seems problematic, i.e., shouldn't impute at this level
table(is.na(dat$q13_a_knows_emigrant))
table(is.na(dat$q19_rlts_mother))
table(is.na(dat$q20_rlts_father))
table(is.na(dat$q23_religious_attendance))

## remove problematic vars
dat <- dat %>% select(-q13_a_knows_emigrant, -q19_rlts_mother, -q20_rlts_father, -q23_religious_attendance)

## check missing again:
vis_miss(dat) 

## remove one missing treatment indicator (can't be imputed)
dat <- dat[which(dat$holy_endorser>-1),]

## impute rest of missings with mean 
for(i in 1:ncol(dat)){
  dat[is.na(dat[,i]), i] <- mean(dat[,i], na.rm = TRUE)
}
rm(i)

## 
dat <- dat[complete.cases(dat), ]

#-----#
# OLS #
#-----#

vars <- c("q2_male","respondent_muslim","q1_age","q5_convert","q6_married","q6_years_married",
          "q7_has_job","q8_has_income","q8_income_last_month","q3_kamba","q3_kikuyu","q3_luo",
          "q3_somali","enumerator_muslim","enumerator_male")

lm_endorser <- lm(paste("treatment_binary ~ ", paste(vars, collapse=" + ")), data = dat)

lm_suicide <- lm(paste("treatment_suicide ~ ", paste(vars, collapse=" + ")), data = dat)


stargazer(lm_endorser, lm_suicide,
          keep = vars,
          covariate.labels = c("Male","Muslim","Age","Convert","Married","Years married","Employed",
                               "Any income", "Income (KES)","Tribe:Kamba","Tribe:Kikuyu","Tribe:Luo",
                               "Tribe:Somali","Enumerator:Muslim","Enumerator:Male"),
          keep.stat = c("n", "f"),
          digits = 3,
          dep.var.caption = "",
          dep.var.labels = c("Explicit source", "No suicide norm"),
          column.labels = c("",""),
          label = "tab:main",
          title = "Regression of treatment on pre-treatment covariates",
          out = "PUT YOUR FILE PATH HERE")

